Q3

Lee Xiao Qi https://example.com/norajones (School of Computing and Information Systems (SMU))https://example.com/spacelysprokets
2022-05-25

The code chunk below is used to install and load the required packages onto RStudio.

packages = c('tidyverse','treemap','ggrepel','lubridate','gapminder','gganimate','ggiraph','plotly','zoo', 'tmap', 'sf','trelliscopejs', 'hrbrthemes','transformr',
             'lubridate','clock',
             'sftime','rmarkdown','data.table')

for(p in packages){
  if(!require(p, character.only =T)){
    install.packages(p)
    }
  library(p, character.only =T)
}

The code chuck below import Employers.csv, Buildings.csv, Jobs.csv, CheckinJournal.csv and Participants.csv from the data folder into R by using read_csv() and save it as an tibble data frame.

Employers <- read_csv("data/Employers.csv")
Buildings <- read_csv("data/Buildings.csv")
Jobs <- read_csv("data/Jobs.csv")
Checkin <- read_csv("data/CheckinJournal.csv")
Participants <- read_csv("data/Participants.csv")

Checkin <- Checkin %>%
  rename('employerId' = 'venueId')
#Extract the date from timestamp
Checkin$Date <- as.Date(Checkin$timestamp)

#Filter rows with workplace as value
Workplace_Checkin <- Checkin[grep("Workplace", Checkin$venueType),]

#Assign Running Week Number
Workplace_Checkin <- Workplace_Checkin %>%
  mutate(Week_Num = as.double(ceiling(difftime(Workplace_Checkin$Date, strptime("01.03.2022", format = "%d.%m.%Y"),units="weeks"))))

#Compute no. of employees that report to work during that week (5day work week) 
Count_Checkin <- Workplace_Checkin %>%
  group_by(Week_Num, employerId) %>%
  summarise('Num_of_Employees'= n_distinct(participantId)) %>%  
  ungroup()

#Calculate Percentage Change  
Count_Checkin <- Count_Checkin %>%  
  group_by(employerId) %>%
  mutate(Perc_Chg = round((Num_of_Employees - lag(Num_of_Employees))/lag(Num_of_Employees)*100,2))%>%
  replace(is.na(.), 0)

Count_Checkin <- merge(Count_Checkin, Employers, by ='employerId')
write_csv(Count_Checkin, "data/csv/Count_Checkin.csv")
Count_Checkin_sf <- read_sf("data/csv/Count_Checkin.csv", 
                   options = "GEOM_POSSIBLE_NAMES=location")

Count_Checkin_sf$Num_of_Employees <- as.numeric(Count_Checkin_sf$Num_of_Employees)
Count_Checkin_sf$Week_Num <- as.numeric(Count_Checkin_sf$Week_Num)

#Compute no. of employers that each participants every week  
Change_Job <- Workplace_Checkin %>%
  group_by(participantId,Week_Num) %>%
  summarise('Num_of_Employers'= n_distinct(employerId)) %>%  
  filter(Num_of_Employers >1) %>% 
  ungroup()  
  
Change_Job <- merge(Change_Job, Participants, by ='participantId')
buildings <- read_sf("data/Buildings.csv", 
                   options = "GEOM_POSSIBLE_NAMES=location")

employers <- read_sf("data/Employers.csv", 
                   options = "GEOM_POSSIBLE_NAMES=location")
Employers <- Employers %>% left_join(Buildings,by="buildingId")

Next, we use outer join on Employers and Jobs based on employerId to have a full overview on all the jobs that are offered by each employer.

Employers = merge(x=Employers,y=Jobs,by="employerId",all=TRUE)
d <- highlight_key(Change_Job)

p1 <- ggplot(d, aes(x= as.factor(Num_of_Employers), fill = educationLevel)) +
  geom_bar() +
  facet_wrap(~educationLevel)+
  ggtitle('Participants with >1 Employers') +
  xlab("Numbers of Employers") +
  ylab("No. of\nParticipants") +
  theme(axis.title.y= element_text(angle=0), axis.ticks.x= element_blank(),
         axis.line= element_line(color= 'grey'))

gg <- highlight(ggplotly(p1),
                "plotly_selected")

crosstalk::bscols(gg, 
                  widths = c(12,12),
                  DT::datatable(d,
                                rownames = FALSE),
                                class = 'display')
display
tmap_mode("view")
tm_shape(buildings)+
tm_polygons(col = "grey",
           size = 1,
           border.col = "black",
           border.lwd = 1) +
tm_shape(employers) +
  tm_dots(col = "red") +
tm_shape(Count_Checkin_sf) +
  tm_bubbles(size ="Num_of_Employees", col = "lightblue")
ggplot() +
  geom_sf(data = buildings) +
  geom_sf(data = Count_Checkin_sf, aes(color =Num_of_Employees, size = Num_of_Employees), alpha = 0.3)  +
  scale_color_distiller(palette = "YlOrRd",trans = "reverse")+
  theme_void()+
  transition_states(Week_Num,
                    transition_length = 0.5,
                    state_length = 2) 

yu